http://demo.slashdb.com/db/pystreet.html
In [1]:
%matplotlib inline
import pandas
import numpy
pandas.options.display.float_format = '${:,.2f}'.format
responses_all = pandas.read_json('http://demo.slashdb.com/db/pystreet/response_usd.json')
responses_usa = pandas.read_json('http://demo.slashdb.com/db/pystreet/response_usd/country/United%20States.json')
In [12]:
responses_usa
Out[12]:
In [13]:
by_country = responses_all.groupby('country').size().sort_values()
by_country.plot(kind='barh', figsize=(7,10), fontsize=15)
Out[13]:
In [5]:
min_salary_cutoff = 500
max_salary_cutoff = 800000
responses_all = responses_all[responses_all['salary_usd'] > min_salary_cutoff]
responses_all = responses_all[responses_all['salary_usd'] < max_salary_cutoff]
responses_usa = responses_usa[responses_usa['salary_usd'] > min_salary_cutoff]
responses_usa = responses_usa[responses_usa['salary_usd'] < max_salary_cutoff]
In [6]:
salary = responses_all["salary_usd"]
salary_usa = responses_usa["salary_usd"]
desired_salary = responses_all["desired_salary_usd"]
d = [{'Min':salary_usa.min(),'Max':salary_usa.max(),'Avg':salary_usa.mean(),'Median':salary_usa.median()},
{'Min':salary.min(),'Max':salary.max(),'Avg':salary.mean(),'Median':salary.median()}]
df = pandas.DataFrame.from_records(d)
df = df[['Min','Max','Avg','Median']]
df.index = ['USA','Worldwide',]
df
Out[6]:
In [7]:
desired_salary = responses_all["desired_salary_usd"]
desired_salary_usa = responses_usa["desired_salary_usd"]
d = [{'Min':desired_salary_usa.min(),'Max':desired_salary_usa.max(),'Avg':desired_salary_usa.mean(),'Median':desired_salary_usa.median()},
{'Min':desired_salary.min(),'Max':desired_salary.max(),'Avg':desired_salary.mean(),'Median':desired_salary.median()}]
df = pandas.DataFrame.from_records(d)
df = df[['Min','Max','Avg','Median']]
df.index = ['USA','Worldwide',]
df
Out[7]:
In [8]:
plot = responses_usa.hist(column=['salary_usd','desired_salary_usd'], bins=[0, 25000,50000,75000,100000,125000,150000,175000,200000,225000,250000,275000,300000,325000,350000,375000,400000,425000,450000,475000,500000], figsize=(20,5), xrot=45, xlabelsize=15)
In [9]:
plot = responses_all.hist(column=['salary_usd','desired_salary_usd'], bins=[0, 25000,50000,75000,100000,125000,150000,175000,200000,225000,250000,275000,300000,325000,350000,375000,400000,425000,450000,475000,500000], figsize=(20,5), xrot=45, xlabelsize=15, ylabelsize=15)
In [10]:
x = responses_all[['salary_usd','desired_salary_usd']].mean()
# x.plot(kind='barh')
plot = x.plot(kind="bar", figsize=(7,7), fontsize=15, title="Average salary and average desired salary worldwide")
print("At a maximum {0:.0%}".format(((responses_all['desired_salary_usd'] - responses_all['salary_usd'])/responses_all['salary_usd'] ).max()))
print("On average {0:.0%}".format(((responses_all['desired_salary_usd'] - responses_all['salary_usd'])/responses_all['salary_usd'] ).mean()))
print("At a minimum {0:.0%}".format(((responses_all['desired_salary_usd'] - responses_all['salary_usd'])/responses_all['salary_usd'] ).min()))
In [11]:
from numpy import max, min
df = responses_usa[['years_experience','salary_usd']]
g = df.groupby('years_experience')
df = g.agg([min, max])
df.plot(kind="line", figsize=(12,7), title="U.S. Python developer salary range as a function of experience.")
df = responses_all[['years_experience','salary_usd']]
g = df.groupby('years_experience')
df = g.agg([min, max])
df.plot(kind="line", figsize=(12,7), title="Worldwide Python developer salary range as a function of experience.")
Out[11]:
Pystreet is a up and coming online community of professional Python developers. We are currently in pre-launch mode at http://pystreet.com
SlashDB is a new kind of middleware, which instantly creates REST APIs to SQL databases so their content becomes instantly accessible to authorized web, mobile and enterpise applications and end-users, under standard data formats for reading and writing. http://www.slashdb.com/